Projekt z analizy danych

Politechnika Poznańska

Kamila Ziemba

02 grudzień 2017

Podsumowanie analizy

Lalalala

Kod pokazujący wykorzystane biblioteki

library('knitr')
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dprep)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library("highcharter")
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use

Kod pozwalający wczytać dane z pliku

Kod przetwarzający brakujące dane

srednia = mean(dane$kwh)
dane<-mutate(dane,kwh = ifelse(kwh == 0, srednia, kwh))

srednia = mean(dane$irri_pvgis_mod)
dane<-mutate(dane,irri_pvgis_mod = ifelse(irri_pvgis_mod == 0, srednia, irri_pvgis_mod))

srednia = mean(dane$irr_pvgis_mod)
dane<-mutate(dane,irr_pvgis_mod= ifelse(irr_pvgis_mod == 0, srednia, irr_pvgis_mod))

srednia = mean(dane$pcnm15)
dane<-mutate(dane,pcnm15= ifelse(pcnm15 == 0, srednia, pcnm15))

srednia = mean(dane$pcnm14)
dane<-mutate(dane,pcnm14= ifelse(pcnm14 == 0, srednia, pcnm14))

srednia = mean(dane$pcnm13)
dane<-mutate(dane,pcnm13= ifelse(pcnm13== 0, srednia, pcnm13))

srednia = mean(dane$pcnm12)
dane<-mutate(dane,pcnm12= ifelse(pcnm12 == 0, srednia, pcnm12))

srednia = mean(dane$pcnm11)
dane<-mutate(dane,pcnm11= ifelse(pcnm11 == 0, srednia, pcnm11))

srednia = mean(dane$pcnm10)
dane<-mutate(dane,pcnm10= ifelse(pcnm10 == 0, srednia, pcnm10))

srednia = mean(dane$pcnm9)
dane<-mutate(dane,pcnm9= ifelse(pcnm9 == 0, srednia, pcnm9))

srednia = mean(dane$pcnm8)
dane<-mutate(dane,pcnm8= ifelse(pcnm8 == 0, srednia, pcnm8))

srednia = mean(dane$pcnm7)
dane<-mutate(dane,pcnm7= ifelse(pcnm7 == 0, srednia, pcnm7))

srednia = mean(dane$pcnm6)
dane<-mutate(dane,pcnm6= ifelse(pcnm6 == 0, srednia, pcnm6))

srednia = mean(dane$pcnm5)
dane<-mutate(dane,pcnm5= ifelse(pcnm5 == 0, srednia, pcnm5))

srednia = mean(dane$pcnm4)
dane<-mutate(dane,pcnm4= ifelse(pcnm4 == 0, srednia, pcnm4))

srednia = mean(dane$pcnm3)
dane<-mutate(dane,pcnm3= ifelse(pcnm3 == 0, srednia, pcnm3))

srednia = mean(dane$pcnm2)
dane<-mutate(dane,pcnm2= ifelse(pcnm2 == 0, srednia, pcnm2))

srednia = mean(dane$pcnm1)
dane<-mutate(dane,pcnm1= ifelse(pcnm1 == 0, srednia, pcnm1))

srednia = mean(dane$altitudei)
dane<-mutate(dane,altitudei= ifelse(altitudei == 0, srednia, altitudei))

srednia = mean(dane$altitude)
dane<-mutate(dane,altitude= ifelse(altitude== 0, srednia, altitude))

srednia = mean(dane$dist)
dane<-mutate(dane,dist= ifelse(dist == 0, srednia, dist))

srednia = mean(dane$cloudcoveri)
dane<-mutate(dane,cloudcoveri= ifelse(cloudcoveri == 0, srednia,cloudcoveri))

srednia = mean(dane$windbearingi)
dane<-mutate(dane,windbearingi= ifelse(windbearingi== 0, srednia,windbearingi))

srednia = mean(dane$dewpointi)
dane<-mutate(dane,dewpointi= ifelse(dewpointi == 0, srednia,dewpointi))

srednia = mean(dane$humidityi)
dane<-mutate(dane,humidityi= ifelse(humidityi == 0, srednia,humidityi))

srednia = mean(dane$cloudcover)
dane<-mutate(dane,cloudcover= ifelse(cloudcover == 0, srednia,cloudcover))

srednia = mean(dane$windbearing)
dane<-mutate(dane,windbearing= ifelse(windbearing== 0, srednia,windbearing))

srednia = mean(dane$dewpoint)
dane<-mutate(dane,dewpoint= ifelse(dewpoint == 0,srednia,dewpoint))

srednia = mean(dane$icon)
dane<-mutate(dane,icon= ifelse(icon == 0, srednia,icon))

srednia = mean(dane$humidity)
dane<-mutate(dane,humidity= ifelse(humidity == 0, srednia,humidity))

srednia = mean(dane$windspeed)
dane<-mutate(dane,windspeed= ifelse(windspeed == 0, srednia,windspeed))

srednia = mean(dane$pressure)
dane<-mutate(dane,pressure= ifelse(pressure== 0, srednia,pressure))

Sekcję podsumowująca rozmiar zbioru i podstawowe statystyki

Rozmiar zbioru:

## [1] 230141     51

Podstawowe statystyki:

##        id             idsito          idmodel         idbrand      
##  Min.   :     1   Min.   :0.0000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.: 99880   1st Qu.:0.1000   1st Qu.:0.167   1st Qu.:0.0830  
##  Median :158870   Median :0.2250   Median :0.208   Median :0.1670  
##  Mean   :153039   Mean   :0.2153   Mean   :0.243   Mean   :0.1514  
##  3rd Qu.:217634   3rd Qu.:0.3250   3rd Qu.:0.292   3rd Qu.:0.1670  
##  Max.   :276488   Max.   :0.4250   Max.   :0.750   Max.   :0.4170  
##                                                                    
##       lat              lon          ageinmonths          anno     
##  Min.   :0.4150   Min.   :0.1540   Min.   :0.0000   Min.   :2012  
##  1st Qu.:0.4370   1st Qu.:0.6200   1st Qu.:0.0000   1st Qu.:2012  
##  Median :0.4370   Median :0.6240   Median :0.1250   Median :2012  
##  Mean   :0.4498   Mean   :0.5699   Mean   :0.3103   Mean   :2012  
##  3rd Qu.:0.4390   3rd Qu.:0.6300   3rd Qu.:0.7190   3rd Qu.:2013  
##  Max.   :0.5530   Max.   :0.6910   Max.   :1.0000   Max.   :2013  
##                                                                   
##       day              ora                     data       
##  Min.   :0.0000   Min.   :0.0000   1/1/2013 10:00:    17  
##  1st Qu.:0.2470   1st Qu.:0.2220   1/1/2013 11:00:    17  
##  Median :0.4770   Median :0.5000   1/1/2013 12:00:    17  
##  Mean   :0.4694   Mean   :0.4999   1/1/2013 13:00:    17  
##  3rd Qu.:0.6880   3rd Qu.:0.7780   1/1/2013 14:00:    17  
##  Max.   :1.0000   Max.   :1.0000   1/1/2013 15:00:    17  
##                                    (Other)       :230039  
##  temperatura_ambiente  irradiamento       pressure        windspeed      
##  Min.   :0.045        Min.   :0.0000   Min.   :0.6495   Min.   :0.00100  
##  1st Qu.:0.212        1st Qu.:0.0000   1st Qu.:0.7480   1st Qu.:0.04200  
##  Median :0.348        Median :0.0370   Median :0.7530   Median :0.06700  
##  Mean   :0.375        Mean   :0.1105   Mean   :0.7387   Mean   :0.07683  
##  3rd Qu.:0.530        3rd Qu.:0.2080   3rd Qu.:0.7550   3rd Qu.:0.10300  
##  Max.   :0.818        Max.   :0.7100   Max.   :0.7680   Max.   :0.69600  
##                                                                          
##     humidity          icon           dewpoint       windbearing    
##  Min.   :0.160   Min.   :0.0830   Min.   :0.1390   Min.   :0.0020  
##  1st Qu.:0.530   1st Qu.:0.4678   1st Qu.:0.5360   1st Qu.:0.3080  
##  Median :0.690   Median :0.6670   Median :0.6220   Median :0.4730  
##  Mean   :0.681   Mean   :0.5570   Mean   :0.6071   Mean   :0.4546  
##  3rd Qu.:0.840   3rd Qu.:0.6670   3rd Qu.:0.6840   3rd Qu.:0.6600  
##  Max.   :1.000   Max.   :0.7500   Max.   :0.8650   Max.   :0.7690  
##                                                                    
##    cloudcover         tempi             irri          pressurei        
##  Min.   :0.0100   Min.   :0.0090   Min.   :0.1080   Min.   :0.0000000  
##  1st Qu.:0.3100   1st Qu.:0.0740   1st Qu.:0.2160   1st Qu.:0.0000000  
##  Median :0.3589   Median :0.1110   Median :0.2200   Median :0.0000000  
##  Mean   :0.4162   Mean   :0.1234   Mean   :0.2221   Mean   :0.0002186  
##  3rd Qu.:0.5100   3rd Qu.:0.1270   3rd Qu.:0.2220   3rd Qu.:0.0000000  
##  Max.   :1.0000   Max.   :0.9830   Max.   :1.0000   Max.   :1.0000000  
##                                                                        
##    windspeedi        humidityi         dewpointi       windbearingi   
##  Min.   :0.00000   Min.   :0.03400   Min.   :0.0630   Min.   :0.0400  
##  1st Qu.:0.03700   1st Qu.:0.04400   1st Qu.:0.1140   1st Qu.:0.3360  
##  Median :0.03800   Median :0.04400   Median :0.1140   Median :0.3360  
##  Mean   :0.03852   Mean   :0.06383   Mean   :0.1194   Mean   :0.3456  
##  3rd Qu.:0.03900   3rd Qu.:0.06200   3rd Qu.:0.1180   3rd Qu.:0.3390  
##  Max.   :1.00000   Max.   :0.57900   Max.   :0.4150   Max.   :1.0000  
##                                                                       
##   cloudcoveri          dist             altitude         azimuth      
##  Min.   :0.0490   Min.   :0.005464   Min.   :0.1110   Min.   :0.1280  
##  1st Qu.:0.1960   1st Qu.:0.185792   1st Qu.:0.4210   1st Qu.:0.2950  
##  Median :0.1960   Median :0.448087   Median :0.5660   Median :0.4230  
##  Mean   :0.2059   Mean   :0.459202   Mean   :0.5488   Mean   :0.4542  
##  3rd Qu.:0.1980   3rd Qu.:0.704918   3rd Qu.:0.6840   3rd Qu.:0.6360  
##  Max.   :1.0000   Max.   :1.000000   Max.   :0.8840   Max.   :0.8180  
##                                                                       
##    altitudei         azimuthi          pcnm1            pcnm2       
##  Min.   :0.0080   Min.   :0.0000   Min.   :0.3750   Min.   :0.0950  
##  1st Qu.:0.0960   1st Qu.:0.2090   1st Qu.:0.3780   1st Qu.:0.2710  
##  Median :0.1370   Median :0.2900   Median :0.3790   Median :0.3770  
##  Mean   :0.2054   Mean   :0.3667   Mean   :0.4473   Mean   :0.3758  
##  3rd Qu.:0.2660   3rd Qu.:0.4860   3rd Qu.:0.3840   3rd Qu.:0.4220  
##  Max.   :0.9820   Max.   :1.0000   Max.   :1.0000   Max.   :0.9720  
##                                                                     
##      pcnm3            pcnm4            pcnm5            pcnm6       
##  Min.   :0.2850   Min.   :0.0580   Min.   :0.0410   Min.   :0.2350  
##  1st Qu.:0.5660   1st Qu.:0.4380   1st Qu.:0.4030   1st Qu.:0.3580  
##  Median :0.6050   Median :0.5310   Median :0.4270   Median :0.4930  
##  Mean   :0.6385   Mean   :0.5484   Mean   :0.4405   Mean   :0.5236  
##  3rd Qu.:0.7300   3rd Qu.:0.6340   3rd Qu.:0.4620   3rd Qu.:0.4949  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##                                                                     
##      pcnm7            pcnm8           pcnm9            pcnm10      
##  Min.   :0.0110   Min.   :0.041   Min.   :0.0720   Min.   :0.4320  
##  1st Qu.:0.0400   1st Qu.:0.217   1st Qu.:0.5320   1st Qu.:0.6190  
##  Median :0.0600   Median :0.412   Median :0.5320   Median :0.6190  
##  Mean   :0.1203   Mean   :0.427   Mean   :0.5708   Mean   :0.6653  
##  3rd Qu.:0.1140   3rd Qu.:0.511   3rd Qu.:0.6000   3rd Qu.:0.7170  
##  Max.   :1.0000   Max.   :1.000   Max.   :1.0000   Max.   :1.0000  
##                                                                    
##      pcnm11           pcnm12          pcnm13           pcnm14      
##  Min.   :0.0640   Min.   :0.498   Min.   :0.1370   Min.   :0.3650  
##  1st Qu.:0.3120   1st Qu.:0.748   1st Qu.:0.6140   1st Qu.:0.4730  
##  Median :0.3270   Median :0.760   Median :0.6140   Median :0.4730  
##  Mean   :0.3445   Mean   :0.801   Mean   :0.6498   Mean   :0.5172  
##  3rd Qu.:0.3270   3rd Qu.:0.884   3rd Qu.:0.7380   3rd Qu.:0.5300  
##  Max.   :1.0000   Max.   :1.000   Max.   :1.0000   Max.   :1.0000  
##                                                                    
##      pcnm15       irr_pvgis_mod    irri_pvgis_mod        kwh        
##  Min.   :0.1500   Min.   :0.0010   Min.   :-0.025   Min.   :0.0010  
##  1st Qu.:0.6120   1st Qu.:0.1793   1st Qu.: 0.158   1st Qu.:0.1070  
##  Median :0.6140   Median :0.1793   Median : 0.194   Median :0.1702  
##  Mean   :0.6044   Mean   :0.2499   Mean   : 0.197   Mean   :0.2263  
##  3rd Qu.:0.6150   3rd Qu.:0.3300   3rd Qu.: 0.214   3rd Qu.:0.3350  
##  Max.   :1.0000   Max.   :1.0000   Max.   : 1.006   Max.   :1.0000  
## 
## [1] 230141
## [1] 51

Szczegółową analizę wartości atrybutów (np. poprzez prezentację rozkładów wartości).

Szczegółową analizę wartości atrybutów

Temperatura i kwh

Wilgotnosc i kwh

Wykres slupkowy wedlug lat kwh

Interaktywny wykres prezentującą zmianę wytwarzanej energii w czasie i przestrzeni